# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------




### R code Data cleaner

data <- read.csv("in/Prior_Elicitation_1803_symmetric.csv", sep=",", header=T)

data <- data[-c(1:2),] 

### Median Question
table(data$Q80)
data$Q80[data$Q80==""] <- NA
#data$Q80 <- droplevels(data$Q80)

table(data$Q95)
data$Q95[data$Q95==""] <- NA
#data$Q95 <- droplevels(data$Q95)

table(data$Q97)
data$Q97[data$Q97==""] <- NA
#data$Q97 <- droplevels(data$Q97)

table(data$Q139)
data$Q139[data$Q139==""] <- NA
#data$Q139 <- droplevels(data$Q139)


### Quantile Question Wide
table(data$Q115)
data$Q115[data$Q115==""] <- NA
#data$Q115 <- droplevels(data$Q115)

table(data$Q116_1)
data$Q116_1[data$Q116_1==""] <- NA
#data$Q116_1 <- droplevels(data$Q116_1)

table(data$Q117_1)
data$Q117_1[data$Q117_1==""] <- NA
#data$Q117_1 <- droplevels(data$Q117_1)


### Quantile Question Narrow
table(data$Q132)
data$Q132[data$Q132==""] <- NA
#data$Q132 <- droplevels(data$Q132)

table(data$Q290_4)
data$Q290_4[data$Q290_4==""] <- NA
#data$Q290_4 <- droplevels(data$Q290_4)

table(data$Q291_1)
data$Q291_1[data$Q291_1==""] <- NA
#data$Q291_1 <- droplevels(data$Q291_1)


### Manski Question
table(data$Q121)
data$Q121[data$Q121==""] <- NA
#data$Q121 <- droplevels(data$Q121)

table(data$Q122)
data$Q122[data$Q122==""] <- NA
#data$Q122 <- droplevels(data$Q122)

table(data$Q123)
data$Q123[data$Q123==""] <- NA
#data$Q123 <- droplevels(data$Q123)

table(data$Q125_1)
data$Q125_1[data$Q125_1==""] <- NA
#data$Q125_1 <- droplevels(data$Q125_1)

table(data$Q126_1)
data$Q126_1[data$Q126_1==""] <- NA
#data$Q126_1 <- droplevels(data$Q126_1)

### Visual Bin
table(data$q1)
data$q1[data$q1==""] <- NA
#data$q1 <- droplevels(data$q1)

Nobs <- dim(data)[1]
data$q1_2024 <- NA
data$q1_2529 <- NA
data$q1_3034 <- NA
data$q1_3539 <- NA
data$q1_4044 <- NA
data$q1_4549 <- NA
data$q1_5054 <- NA
data$q1_5559 <- NA
data$q1_6064 <- NA
data$q1_6569 <- NA
data$q1_7074 <- NA
data$q1_7579 <- NA

for (i in 1:Nobs){
  if(length(strsplit(as.character(data$q1[i]), ",")[[1]])==12){
    aa <- as.numeric(strsplit(as.character(data$q1[i]), ",")[[1]])
    data$q1_2024[i] <- aa[1]
    data$q1_2529[i] <- aa[2]
    data$q1_3034[i] <- aa[3]
    data$q1_3539[i] <- aa[4]
    data$q1_4044[i] <- aa[5]
    data$q1_4549[i] <- aa[6]
    data$q1_5054[i] <- aa[7]
    data$q1_5559[i] <- aa[8]
    data$q1_6064[i] <- aa[9]
    data$q1_6569[i] <- aa[10]
    data$q1_7074[i] <- aa[11]
    data$q1_7579[i] <- aa[12]
  }
}

rm(aa,i,Nobs)

### Screener Questions
table(data$Q135)
#data$Q135 <- droplevels(data$Q135)
data$screen1 <- ifelse(data$Q135=="Purple and yellow", 1, 0)

table(data$Q127)
#data$Q127 <- droplevels(data$Q127)
data$screen2 <- ifelse(data$Q127=="University of Zurich", 1, 0)


prop.table(table(data$screen1))
prop.table(table(data$screen2))

### Timing Variable

table(data$Duration..in.seconds.)

#data$Duration..in.seconds. <- droplevels(data$Duration..in.seconds.)
data$time <- as.numeric(as.character(data$Duration..in.seconds.))

data$treatment <- NA
data$treatment[is.na(data$Q80)==F] <- "Median"
data$treatment[is.na(data$Q115)==F] <- "Q.Wide"
data$treatment[is.na(data$Q132)==F] <- "Q.Narrow"
data$treatment[is.na(data$Q121)==F] <- "Manski"
data$treatment[is.na(data$q1)==F] <- "Bins"
data$treatment <- factor(data$treatment, levels=c("Manski", "Median", "Q.Wide", "Q.Narrow", "Bins"))

table(data$treatment)
time.means <- aggregate(data$time, by=list(treatment=data$treatment), mean, na.rm=T)

data$experiment <- "sym_small"
data.sym.small <- data[,c("time", "treatment", "experiment")]


m <- lm(time ~ treatment, data=data)
summary(m)
anova(m)

time.median <- aggregate(data$time, by=list(treatment=data$treatment), median, na.rm=T)
time.sd <- aggregate(data$time, by=list(treatment=data$treatment), sd, na.rm=T)

xtable(time.median)

###  Analysis of Timing

# time.dat <- rbind(data.trump, data.asym.large, data.asym.small, data.sym.large, data.sym.small)
# head(time.dat)
# 
# ### Experiments
# time.median <- aggregate(time.dat$time[time.dat$experiment!="Trump"], by=list(treatment=time.dat$treatment[time.dat$experiment!="Trump"]), median, na.rm=T)
# time.median
# xtable(time.median)
# 
# m <- lm(time ~ treatment, data=time.dat[time.dat$experiment!="Trump",])
# summary(m)
# anova(m)
# xtable(anova(m))
# 
# ### Trump Vote 2020
# time.median <- aggregate(time.dat$time[time.dat$experiment=="Trump"], by=list(treatment=time.dat$treatment[time.dat$experiment=="Trump"]), median, na.rm=T)
# time.median
# xtable(time.median)
# 
# m <- lm(time ~ treatment, data=time.dat[time.dat$experiment=="Trump",])
# summary(m)
# anova(m)
# xtable(anova(m))